# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py
# Cufflinks wrapper on plotly
import cufflinks
# Data science imports
import pandas as pd
import numpy as np
# Options for pandas
pd.options.display.max_columns = 30
# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
from plotly.offline import iplot, init_notebook_mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')
from src.load_datasets import load_datasets
train_data, test_data = load_datasets()
train_df = pd.DataFrame(tfds.as_numpy(train_data), columns=['text', 'type'])
train_df['type'] = train_df['type'].apply(humanize_label)
train_df.head()
N/A% (0 of 1600) | | Elapsed Time: 0:00:00 ETA: --:--:--
Start reading dataset from ./data/training.1600000.processed.noemoticon.csv
100% (1600 of 1600) |####################| Elapsed Time: 0:07:46 Time: 0:07:46
| text | type | |
|---|---|---|
| 0 | b"@switchfoot http://twitpic.com/2y1zl - Awww,... | bad |
| 1 | b"is upset that he can't update his Facebook b... | bad |
| 2 | b'@Kenichan I dived many times for the ball. M... | bad |
| 3 | b'my whole body feels itchy and like its on fi... | bad |
| 4 | b"@nationwideclass no, it's not behaving at al... | bad |
print('Training dataset records', len(train_df.index))
train_df['type'].iplot(
kind='hist',
yTitle='count',
xTitle='Type',
title='Training data distribution'
)
Training dataset records 1600000